clear all
tempfile tempsave
set seed ${seed}

// Use the Census (1970) extract.
use "${rawdata}census1970/census.dta", clear

// Keep only working age individuals between 18 and 64 years.
tab age, m
drop if age<18 | age>64
tab age, m

// Keep only those that reported a positive income.
sum incwage, det
replace incwage=. if incwage==999999
sum incwage, det

// Keep only those that are not self-employed.
tab classwkr, m
keep if classwkr==2
tab classwkr, m

// Industry in 1990 codes.
* Compare https://usa.ipums.org/usa/volii/ind1970.shtml
tab ind1990, m
rename ind1990 ind_old

replace ind = .
replace ind = .  if inlist(ind_old,0,939)
// (1)  Agriculture, forestry, and fishing
replace ind = 1  if inlist(ind_old,010,011,012,020,030,031,032)
// (2)  Mining
replace ind = 2  if inlist(ind_old,040,041,042,050)
// (3)  Construction
replace ind = 3  if inlist(ind_old,060)
// (4)  Low Tech Manufacturing
replace ind = 4  if inlist(ind_old,100,101,102,110,111,112,120,121,122,130,132,140,141,142,150,151,152,220,221,222,230,231,232,241,242,250,251,252,261,262)
// (5)  Basic Tech Manufacturing
replace ind = 5  if inlist(ind_old,130,160,161,162,171,172,270,271,272,280,281,282,290,291,292,300,301,310,311,312,320,321,322,331,332,340,341,342,350,351,360,361,370,390,391,392)
// (6)  High Tech Manufacturing
replace ind = 6  if inlist(ind_old,180,181,182,190,191,192,200,201,210,211,212,352,362,371,372,380,381)
// (7)  Transportation
replace ind = 7  if inlist(ind_old,400,401,402,410,411,412,420,421,422,432)
// (8)  Communications
replace ind = 8  if inlist(ind_old,440,441,442)
// (9)  Utilities
replace ind = 9  if inlist(ind_old,450,451,452,470,471,472)
// (10) Wholesale Trade
replace ind = 10 if inlist(ind_old,500,501,502,510,511,512,521,530,531,532,540,541,542,550,551,552,560,561,562,571)
// (11) Retail Trade
replace ind = 11 if inlist(ind_old,580,581,582,590,591,592,600,601,602,610,611,612,620,621,622,623,630,631,632,633,640,641,642,650,651,652,660,661,662,663,670,671,672,681,682,691)
// (12) Finance
replace ind = 12 if inlist(ind_old,700,701,702,710,711,712)
// (13) Business and Repair Services
replace ind = 13 if inlist(ind_old,721,722,731,732,740,741,742,750,751,752,760)
// (14) Personal Services
replace ind = 14 if inlist(ind_old,761,762,770,771,772,780,781,782,790,791)
// (15) Entertainment and Recreation Services
replace ind = 15 if inlist(ind_old,800,801,802,810)
// (16) Professional Services
replace ind = 16 if inlist(ind_old,812,820,821,822,830,831,832,840,841,842,850,851,852,860,861,862,863,870,871,872,873,880,881,882,890,891,892,893)
// (17) Public Administration
replace ind = 17 if inlist(ind_old,900,901,910,921,922,930,931,932)
tab ind, m

// Occupation in 1990 Codes.
tab occ1990, m
rename occ1990 occ_old

replace occ = .
// No occupation or military.
replace occ = .  if occ_old == 0  | occ_old > 900
// (1)  Executive, Administrative, and Managerial Occupations
replace occ = 1  if occ_old > 0   & occ_old < 023
// (2)  Management Related Occupations
replace occ = 2  if occ_old > 022 & occ_old < 038
// (3)  Professional Specialty Occupations
replace occ = 3  if occ_old > 037 & occ_old < 200
// (4)  Technicians and Related Support Occupations
replace occ = 4  if occ_old > 199 & occ_old < 209
// (5)  Technologists and Technicians, Except Health
replace occ = 5  if occ_old > 208 & occ_old < 236
// (6)  Sales Occupations
replace occ = 6  if occ_old > 235 & occ_old < 291
// (7)  Administrative Support Occupations, Including Clerical
replace occ = 7  if occ_old > 290 & occ_old < 392
// (8)  Private Household Occupations
replace occ = 8  if occ_old > 391 & occ_old < 409
// (9)  Protective Service Occupations
replace occ = 9  if occ_old > 408 & occ_old < 428
// (10) Service Occupations, Except Protective and Household
replace occ = 10 if occ_old > 427 & occ_old < 470
// (11) Farm Operators and Managers
replace occ = 11 if occ_old > 469 & occ_old < 477
// (12) Other Agricultural and Related Occupations
replace occ = 12 if occ_old > 476 & occ_old < 500
// (13) Mechanics and Repairers, Except Supervisors
replace occ = 13 if occ_old > 503 & occ_old < 550
// (14) Construction Trades
replace occ = 14 if occ_old > 549 & occ_old < 600
// (15) Extractive Occupations
replace occ = 15 if occ_old > 599 & occ_old < 618
// (16) Precision Production Occupations
replace occ = 16 if occ_old > 617 & occ_old < 700
// (17) Machine Operators, Assemblers, and Inspectors Machine Operators and Tenders, Except Precision
replace occ = 17 if occ_old > 699 & occ_old < 780
// (18) Fabricators, Assemblers, and Hand Working Occupations
replace occ = 18 if occ_old > 779 & occ_old < 796
// (19) Production Inspectors, Testers, Samplers, and Weighers
replace occ = 19 if occ_old > 795 & occ_old < 800
// (20) Transportation and Material Moving Occupations
replace occ = 20 if occ_old > 799 & occ_old < 816
// (21) Transportation Occupations, Except Motor Vehicles
replace occ = 21 if occ_old > 815 & occ_old < 891

tab occ, m

// Education.
tab educ, m
rename educ educ_old

gen educ = .
// less than or equal to high school
replace educ = 1 if inlist(educ_old,0,1,2,3,4,5)
// high school graduate
replace educ = 2 if inlist(educ_old, 6)
// at least some college
replace educ = 3 if inlist(educ_old,7,8,9,10,11)

tab educ, m

// Race-Ethnicity.
tab race, m
tab hispan, m
gen reth = .
// White, non-hispanic
replace reth = 1 if race == 1 & hispan == 0
// Black, non-hispanic
replace reth = 2 if race == 2 & hispan == 0
// Hispanic
replace reth = 3 if hispan > 0
tab reth, m

// Sex.
tab sex, m

// Census Regions.
*Source: https://www2.census.gov/geo/pdfs/maps-data/maps/reference/us_regdiv.pdf
tab statefip, m
gen cregion = .
// North-East
replace cregion = 1 if inlist(statefip,9,23,25,33,44,50,34,36,42)
// Midwest
replace cregion = 2 if inlist(statefip,18,17,26,39,55,19,20,27,29,31,38,46)
 // South
replace cregion = 3 if inlist(statefip,10,11,12,13,24,37,45,51,54,1,21,28,47,5,22,40,48)
// West
replace cregion = 4 if inlist(statefip,4,8,16,35,30,49,32,56,2,6,15,41,53)
tab cregion, m

/*Total*/
gen total=1

// Restrict
local vars ind occ sex educ reth cregion total
gegen obsdrop=rowmiss(`vars')
tab obsdrop, m
keep if obsdrop==0
keep `vars'
save "${temp}census.dta", replace

/*Create Industry-Occupation Shares in 1970 in Sex x Education x Race/Ethnic x Census Region Cells*/
foreach x of local vars{
	gdistinct `x'
	global size_`x'=r(ndistinct)
}

local groups 	iserc serc ioser iser
local v_iserc 	ind sex educ reth cregion
local v_serc 	sex educ reth cregion
local v_ioser	ind occ sex educ reth
local v_iser	ind sex educ reth


foreach g of local groups{
	use "${temp}census.dta", clear
	gcollapse (count) empl_1970_`g' = total, by(`v_`g'')
	di "`g'"
	save "`tempsave'", replace

	/*Create synthetic dataset to check existence of all cells*/
	clear 
	set obs 1
	gen year=1970
	local gg
	foreach m of local v_`g'{
		expand ${size_`m'}
		bysort year `gg': gen `m'=_n
		local gg `gg' `m'
	}

	/*Replace missing cells with 0*/
	merge 1:1 `v_`g'' using "`tempsave'", nogen
	replace empl_1970_`g'=0 if empl_1970_`g'==.
	save "${temp}base_`g'.dta", replace
}

/*Create Industry-Occupation Shares in 1970 at national level (jacknife exclusion of regional units)*/
local groups 	io i o tot
local v_io		ind occ
local v_i 		ind
local v_o		occ
local v_tot 	total

foreach g of local groups{
	/*Create employment baseline at national level (jacknife exclusion of regional units)*/
	use cregion `v_`g'' total using "${temp}census.dta", clear	
	forvalues r=1/${size_cregion}{
		
		preserve
		di "Region:" "`r'"
		drop if cregion==`r'			
		gcollapse (count) empl=total, by(`v_`g'')
		gen cregion=`r'
				
		rename (empl) (empl_1970_`g')
		
		keep cregion empl_* `v_`g''
		
		if `r'>1{
			append using "`tempsave'"
		}
		save "`tempsave'", replace
		restore
	}
	
	/*Create synthetic dataset to check existence of all cells*/
	clear 
	set obs 1
	gen year=1970
	expand ${size_cregion}
	bysort year: gen cregion=_n
	local gg
	foreach m of local v_`g'{
		expand ${size_`m'}
		bysort year cregion `gg': gen `m'=_n
		local gg `gg' `m'
	}

	/*Replace missing cells with 0*/
	merge 1:1 `v_`g'' cregion using "`tempsave'", nogen
	replace empl_1970_`g'=0 if empl_1970_`g'==.
	save "${temp}base_`g'.dta", replace	
}

